// Package util contains some common functions of GO_SPIDER project.
package util

import (
	"net/url"
	"os"
	"path"
	"regexp"
	"strings"
)

const domainRegex = "^([\\w-]+\\.)+((com)|(net)|(org)|(gov\\.cn)|(info)|(cc)|(com\\.cn)|(net\\.cn)|(org\\.cn)|(name)|(biz)|(tv)|(cn)|(mobi)|(name)|(sh)|(ac)|(io)|(tw)|(com\\.tw)|(hk)|(com\\.hk)|(ws)|(travel)|(us)|(tm)|(la)|(me\\.uk)|(org\\.uk)|(ltd\\.uk)|(plc\\.uk)|(in)|(eu)|(it)|(jp))"

// JsonpToJson modify jsonp string to json string
// Example: forbar({a:"1",b:2}) to {"a":"1","b":2}
func JsonpToJson(json string) string {
	start := strings.Index(json, "{")
	end := strings.LastIndex(json, "}")
	start1 := strings.Index(json, "[")
	if start1 > 0 && start > start1 {
		start = start1
		end = strings.LastIndex(json, "]")
	}
	if end > start && end != -1 && start != -1 {
		json = json[start : end+1]
	}
	json = strings.Replace(json, "\\'", "", -1)
	regDetail, _ := regexp.Compile("([^\\s\\:\\{\\,\\d\"]+|[a-z][a-z\\d]*)\\s*\\:")
	return regDetail.ReplaceAllString(json, "\"$1\":")
}

// The GetWDPath gets the work directory path.
func GetWDPath() string {
	wd := os.Getenv("GOPATH")
	if wd == "" {
		panic("GOPATH is not setted in env.")
	}
	return wd
}

//将相对地址补充为绝对地址
func NormalizeUrl(innerUrl string, baseUrl string) string {

	if innerUrl == "" || innerUrl == "/" || innerUrl == "#" {
		return ""
	}

	if innerUrl == "javascript:void(0)" {
		return ""
	}
	var normalizeUrl string = innerUrl
	if strings.Index(innerUrl, "../") > 0 || strings.HasPrefix(innerUrl, "/") || (len(innerUrl) > 5 && strings.ToLower(innerUrl[0:4]) != "http" && strings.ToLower(innerUrl[0:5]) != "https") {
		u, err := url.Parse(baseUrl)
		if err != nil {
			panic(err)
		}
		//../xx
		if strings.Index(innerUrl, "../") == 0 {
			innerUrl = path.Join("../", innerUrl)
			innerUrl = path.Join(u.Path, innerUrl)
			normalizeUrl = u.Scheme + "://" + u.Host + innerUrl
		} else if strings.HasPrefix(innerUrl, "/") {
			// /xx
			normalizeUrl = u.Scheme + "://" + u.Host + innerUrl
		} else { // xx/xx/xx.html
			x, err := regexp.Compile(domainRegex)
			if err != nil {
				panic(err)
			}
			if x.Match([]byte(innerUrl)) { //www.domain.com/xx
				normalizeUrl = u.Scheme + "://" + innerUrl
			} else {
				innerUrl = strings.Replace(innerUrl, "//", "/", -1)
				innerUrlParts := strings.Split(innerUrl, "/")
				if len(innerUrlParts) > 1 {
					for i := 0; i < len(innerUrlParts); i++ {
						innerUrl = path.Join("../", innerUrl)
					}
					innerUrl = path.Join(u.Path, innerUrl)
				} else {
					dir, _ := path.Split(u.Path)
					innerUrl = path.Join(dir, innerUrl)
				}

				normalizeUrl = u.Scheme + "://" + u.Host + innerUrl
			}
		}
	}
	//scheme: //[userinfo@]host/path[?query][#fragment]
	u, err := url.Parse(normalizeUrl)
	if err != nil {
		return ""
	}
	// 支持http ,https ;忽略 #
	if strings.ToLower(u.Scheme) == "http" || strings.ToLower(u.Scheme) == "https" {
		var uPath string = u.Path

		if strings.HasSuffix(u.Path, "/") {
			uPath = u.Path[0 : len(u.Path)-1]
		}
		if u.RawQuery != "" {
			return u.Scheme + "://" + u.Host + uPath + "?" + u.RawQuery
		} else {
			return u.Scheme + "://" + u.Host + uPath
		}

	}
	return ""

}

// The IsDirExists judges path is directory or not.
func IsDirExists(path string) bool {
	fi, err := os.Stat(path)

	if err != nil {
		return os.IsExist(err)
	} else {
		return fi.IsDir()
	}

	panic("util isDirExists not reached")
}

// The IsFileExists judges path is file or not.
func IsFileExists(path string) bool {
	fi, err := os.Stat(path)

	if err != nil {
		return os.IsExist(err)
	} else {
		return !fi.IsDir()
	}

	panic("util isFileExists not reached")
}

// The IsNum judges string is number or not.
func IsNum(a string) bool {
	reg, _ := regexp.Compile("^\\d+$")
	return reg.MatchString(a)
}
func main() {
	innerUrl := "../img/img.jpg"
	xurl := NormalizeUrl(innerUrl, "http://www.tuozhe8.com/content/css/style.css")
	println(innerUrl)
	println(xurl)

}
